
## @knitr CLUSTER_SELECTION_SETUP

rm(list=ls())


# from \url{https://www.r-bloggers.com/identifying-the-os-from-r/}
get_os <- function(){
  sysinf <- Sys.info()
  if (!is.null(sysinf)){
    os <- sysinf['sysname']
    if (os == 'Darwin')
      os <- "osx"
  } else { ## mystery machine
    os <- .Platform$OS.type
    if (grepl("^darwin", R.version$os))
      os <- "osx"
    if (grepl("linux-gnu", R.version$os))
      os <- "linux"
  }
  tolower(os)
}

get_machine  <- function(){
    sysinf  <- Sys.info()
    if (!is.null(sysinf)){
        my.machine  <- sysinf['nodename']
    }
    tolower(my.machine)
}


if(get_os()=="windows" & get_machine()=="mompracen") {
    path <- setwd('C:/Users/giaco/Documents/MEGAsync/')
} else if (get_os()=="windows" & get_machine()!="mompracen") {
    path <- setwd('C:/Users/grieco/Dropbox/')
} else if (get_machine()=="aramis") {
    path <- setwd('/home/giacomo68/Dropbox/')
} else {
    path <- setwd('/home/giacomo/Dropbox/')
}


library(survival)
library(xtable)
library(ggplot2)
library(english)
library(Publish)
library(showtext)
library(extrafont)
library(mice)
library(mitools)
library(sandwich)
library(lmtest)
library(ggpubr)
library(gridExtra)

Data <- read.csv('gc-jg-project/kampala/data/data-rome.csv',
                 stringsAsFactors=FALSE)

Data$t1.rome2 <- Data$t1.rome*Data$t1.rome/100000
Data$t1.rome3 <- Data$t1.rome2*Data$t1.rome/100000

Data$country_id <- as.numeric(factor(Data$country_name))

Data$row.num <- rownames(Data)

Data.icc <- read.csv('gc-jg-project/kampala/data/data-kampala.csv',
                     stringsAsFactors=FALSE)

Data.icc$country_id <- as.numeric(factor(Data.icc$country_name))

m <- 10

imp <- mice(Data[,c("t.rome",
                 "sidea.1991",
                 "sideb.1991",
                 "my.mil.cap.lag",
                 "v2x_polyarchy.lag",
                 "v2x_libdem.lag",     
                 "v2x_partipdem.lag",
                 "v2x_delibdem.lag", 
                 "v2x_egaldem.lag",
                 "gdp.cap.lag",
                 "pop.lag",
                 "common.law",
                 "year",
                 "rule.law.lag",
                 "civ.war.1990",
                 "country_id")], m = m, maxit = 10, seed = 1234)

models <- list()
model_results <- list()
merged_imputations <- list()
l1.model_results <- list()
l2.model_results <- list()
l3.model_results <- list()
l4.model_results <- list()
l5.model_results <- list()
l6.model_results <- list()
l1 <- list()
l2 <- list()
l3 <- list()
l4 <- list()
l5 <- list()
l6 <- list()
l1.fitted <- list()
l2.fitted <- list()
l3.fitted <- list()
l4.fitted <- list()
l5.fitted <- list()
l6.fitted <- list()
l1.vcov_cluster <- list()
l2.vcov_cluster <- list()
l3.vcov_cluster <- list()
l4.vcov_cluster <- list()
l5.vcov_cluster <- list()
l6.vcov_cluster <- list()


for (i in 1:m) {
    complete_data <- complete(imp, i)

    complete_data$.imp <- i
    
    merged_data <- merge(complete_data, 
                         Data[,c("country_id","year","country_name",
                                 "sidea.1945",
                                 "sideb.1945",
                                 "sidea.force.1945",
                                 "sideb.force.1945",
                                 "sidea.no.force.1945",
                                 "sideb.no.force.1945",
                                 "sidea.force.1991",
                                 "sideb.force.1991",
                                 "sidea.no.force.1991",
                                 "sideb.no.force.1991",
                                 "t1.rome", 
                                 "t1.rome2",
                                 "t1.rome3",
                                 "row.num"
                                 )], 
                        by = c("country_id", "year"),  # Note: quoted variable names
                        all.x = TRUE)  # Keep all rows from complete_data
    
    merged_imputations[[i]] <- merged_data
        
    current_data <- merged_imputations[[i]]

    l1[[i]] <- glm(t.rome~
                  sidea.1991
              + sideb.1991
              + my.mil.cap.lag
              + v2x_libdem.lag
              + log(gdp.cap.lag)
              + log(pop.lag)
              + civ.war.1990
              + civ.war.1990*v2x_libdem.lag
              + common.law
              + t1.rome + t1.rome2 + t1.rome3
             ,data=current_data,
              family="binomial")


    
    l1.vcov_cluster[[i]] <- vcovCL(l1[[i]], cluster = current_data$country_id,
                                   type = "HC1")

     l1.model_results[[i]] <- list(
        coefficients = coef(l1[[i]]),
        variance = l1.vcov_cluster[[i]],
        n = nrow(current_data)
       )
    
    l1.fitted[[i]] <- as.data.frame(l1[[i]]$fitted.values)
    colnames(l1.fitted[[i]]) <- "fitted.values"
    l1.fitted[[i]]$row.num <- rownames(l1.fitted[[i]])


    l2[[i]] <- glm(t.rome~
                  sidea.1945
              + sideb.1945
              + my.mil.cap.lag
              + v2x_libdem.lag
              + log(gdp.cap.lag)
              + log(pop.lag)
              + civ.war.1990
              + civ.war.1990*v2x_libdem.lag
              + common.law
              + t1.rome + t1.rome2 + t1.rome3
             ,data=current_data,
              family="binomial")

    
    l2.fitted[[i]] <- as.data.frame(l2[[i]]$fitted.values)
    colnames(l2.fitted[[i]]) <- "fitted.values"
    l2.fitted[[i]]$row.num <- rownames(l2.fitted[[i]])

    l2.vcov_cluster[[i]] <- vcovCL(l2[[i]], cluster = current_data$country_id,
                                   type = "HC1")

     l2.model_results[[i]] <- list(
        coefficients = coef(l2[[i]]),
        variance = l2.vcov_cluster[[i]],
        n = nrow(current_data)
       )


    l3[[i]] <- glm(t.rome~
                       sidea.force.1991
                   + sideb.force.1991
                   + my.mil.cap.lag
                   + v2x_libdem.lag
                   + log(gdp.cap.lag)
                   + log(pop.lag)
                   + civ.war.1990
                   + civ.war.1990*v2x_libdem.lag
                   + common.law
                   + t1.rome + t1.rome2 + t1.rome3
                  ,data=current_data,
                   family="binomial")

    l3.fitted[[i]] <- as.data.frame(l3[[i]]$fitted.values)
    colnames(l3.fitted[[i]]) <- "fitted.values"
    l3.fitted[[i]]$row.num <- rownames(l3.fitted[[i]])

    l3.vcov_cluster[[i]] <- vcovCL(l3[[i]], cluster = current_data$country_id,
                                   type = "HC1")

     l3.model_results[[i]] <- list(
        coefficients = coef(l3[[i]]),
        variance = l3.vcov_cluster[[i]],
        n = nrow(current_data)
       )


    
    l4[[i]] <- glm(t.rome~
                  sidea.force.1945
              + sideb.force.1945
              + my.mil.cap.lag
              + v2x_libdem.lag
              + log(gdp.cap.lag)
              + log(pop.lag)
              + civ.war.1990
              + civ.war.1990*v2x_libdem.lag
              + common.law
              + t1.rome + t1.rome2 + t1.rome3
             ,data=current_data,
              family="binomial")

    
    l4.fitted[[i]] <- as.data.frame(l4[[i]]$fitted.values)
    colnames(l4.fitted[[i]]) <- "fitted.values"
    l4.fitted[[i]]$row.num <- rownames(l4.fitted[[i]])

    l4.vcov_cluster[[i]] <- vcovCL(l4[[i]], cluster = current_data$country_id,
                                   type = "HC1")

     l4.model_results[[i]] <- list(
        coefficients = coef(l4[[i]]),
        variance = l4.vcov_cluster[[i]],
        n = nrow(current_data)
       )


    l5[[i]] <- glm(t.rome~
                       sidea.no.force.1991
                   + sideb.no.force.1991
                   + my.mil.cap.lag
                   + v2x_libdem.lag
                   + log(gdp.cap.lag)
                   + log(pop.lag)
                   + civ.war.1990
                   + civ.war.1990*v2x_libdem.lag
                   + common.law
                   + t1.rome + t1.rome2 + t1.rome3
                  ,data=current_data,
                   family="binomial")

    
    l5.fitted[[i]] <- as.data.frame(l5[[i]]$fitted.values)
    colnames(l5.fitted[[i]]) <- "fitted.values"
    l5.fitted[[i]]$row.num <- rownames(l5.fitted[[i]])

    l5.vcov_cluster[[i]] <- vcovCL(l5[[i]], cluster = current_data$country_id,
                                   type = "HC1")

     l5.model_results[[i]] <- list(
        coefficients = coef(l5[[i]]),
        variance = l5.vcov_cluster[[i]],
        n = nrow(current_data)
       )

    
    l6[[i]] <- glm(t.rome~
                       sidea.no.force.1945
                   + sideb.no.force.1945
                   + my.mil.cap.lag
                   + v2x_libdem.lag
                   + log(gdp.cap.lag)
                   + log(pop.lag)
                   + civ.war.1990
                   + civ.war.1990*v2x_libdem.lag
                   + common.law
                   + t1.rome + t1.rome2 + t1.rome3
                  ,data=current_data,
                   family="binomial")
   
    l6.fitted[[i]] <- as.data.frame(l6[[i]]$fitted.values)
    colnames(l6.fitted[[i]]) <- "fitted.values"
    l6.fitted[[i]]$row.num <- rownames(l6.fitted[[i]])

    l6.vcov_cluster[[i]] <- vcovCL(l6[[i]], cluster = current_data$country_id,
                                   type = "HC1")

     l6.model_results[[i]] <- list(
        coefficients = coef(l6[[i]]),
        variance = l6.vcov_cluster[[i]],
        n = nrow(current_data)
       )

}


l1.pooled_results <- MIcombine(
    results = lapply(l1.model_results, function(x) x$coefficients),
    variances = lapply(l1.model_results, function(x) x$variance)
)

l2.pooled_results <- MIcombine(
    results = lapply(l2.model_results, function(x) x$coefficients),
    variances = lapply(l2.model_results, function(x) x$variance)
)

l3.pooled_results <- MIcombine(
    results = lapply(l3.model_results, function(x) x$coefficients),
    variances = lapply(l3.model_results, function(x) x$variance)
)

l4.pooled_results <- MIcombine(
    results = lapply(l4.model_results, function(x) x$coefficients),
    variances = lapply(l4.model_results, function(x) x$variance)
)

l5.pooled_results <- MIcombine(
    results = lapply(l5.model_results, function(x) x$coefficients),
    variances = lapply(l5.model_results, function(x) x$variance)
)

l6.pooled_results <- MIcombine(
    results = lapply(l6.model_results, function(x) x$coefficients),
    variances = lapply(l6.model_results, function(x) x$variance)
)



# View final results
summary(l1.pooled_results)
summary(l2.pooled_results)
summary(l3.pooled_results)
summary(l4.pooled_results)
summary(l5.pooled_results)
summary(l6.pooled_results)


coef.l1 <- l1.pooled_results$coefficients
se.l1 <- sqrt(diag(l1.pooled_results$variance))
z.l1 <- coef.l1/se.l1
p.l1 <- 2*pnorm(-abs(z.l1))
conf.l1 <- confint(l1.pooled_results)
m.l1 <- cbind(coef.l1,se.l1,p.l1,conf.l1)

coef.l2 <- l2.pooled_results$coefficients
se.l2 <- sqrt(diag(l2.pooled_results$variance))
z.l2 <- coef.l2/se.l2
p.l2 <- 2*pnorm(-abs(z.l2))
conf.l2 <- confint(l2.pooled_results)
m.l2 <- cbind(coef.l2,se.l2,p.l2,conf.l2)

coef.l3 <- l3.pooled_results$coefficients
se.l3 <- sqrt(diag(l3.pooled_results$variance))
z.l3 <- coef.l3/se.l3
p.l3 <- 2*pnorm(-abs(z.l3))
conf.l3 <- confint(l3.pooled_results)
m.l3 <- cbind(coef.l3,se.l3,p.l3,conf.l3)

coef.l4 <- l4.pooled_results$coefficients
se.l4 <- sqrt(diag(l4.pooled_results$variance))
z.l4 <- coef.l4/se.l4
p.l4 <- 2*pnorm(-abs(z.l4))
conf.l4 <- confint(l4.pooled_results)
m.l4 <- cbind(coef.l4,se.l4,p.l4,conf.l4)

coef.l5 <- l5.pooled_results$coefficients
se.l5 <- sqrt(diag(l5.pooled_results$variance))
z.l5 <- coef.l5/se.l5
p.l5 <- 2*pnorm(-abs(z.l5))
conf.l5 <- confint(l5.pooled_results)
m.l5 <- cbind(coef.l5,se.l5,p.l5,conf.l5)

coef.l6 <- l6.pooled_results$coefficients
se.l6 <- sqrt(diag(l6.pooled_results$variance))
z.l6 <- coef.l6/se.l6
p.l6 <- 2*pnorm(-abs(z.l6))
conf.l6 <- confint(l6.pooled_results)
m.l6 <- cbind(coef.l6,se.l6,p.l6,conf.l6)

m.l1 <- m.l1[c(1:8,13,9:12),]
m.l2 <- m.l2[c(1:8,13,9:12),]
m.l3 <- m.l3[c(1:8,13,9:12),]
m.l4 <- m.l4[c(1:8,13,9:12),]
m.l5 <- m.l5[c(1:8,13,9:12),]
m.l6 <- m.l6[c(1:8,13,9:12),]


rownames(m.l1) <- rownames(m.l2) <- rownames(m.l3) <-
    rownames(m.l4) <- rownames(m.l5) <- rownames(m.l6) <- 
    c("(Intercept)",
      "MID side A",
      "MID side B",
      "Milex per cap",
      "Polyarchy",
      "GDP cap (log)",
      "Population (log)",
      "Recent civil war",
      "Recent civil war*Polyarchy",
      "Common law",
      "Time",
      "Time2",
      "Time3")

colnames(m.l1) <- colnames(m.l2) <- colnames(m.l3) <-
    colnames(m.l4) <- colnames(m.l5) <- colnames(m.l6) <-
    c("b","std.err","p","X2.5.","X97.5.")

t.l1 <- data.frame(round(m.l1,3))
t.l1$Variables <- rownames(m.l1)
rownames(t.l1) <- NULL
t.l1$type <- "Overall,\nfrom 1991"

t.l2 <- data.frame(round(m.l2,3))
t.l2$Variables <- rownames(m.l2)
rownames(t.l2) <- NULL
t.l2$type <- "Overall,\nfrom 1945"

t.l3 <- data.frame(round(m.l3,3))
t.l3$Variables <- rownames(m.l3)
rownames(t.l3) <- NULL
t.l3$type <- "Force,\nfrom 1991"

t.l4 <- data.frame(round(m.l4,3))
t.l4$Variables <- rownames(m.l4)
rownames(t.l4) <- NULL
t.l4$type <- "Force,\nfrom 1945"

t.l5 <- data.frame(round(m.l5,3))
t.l5$Variables <- rownames(m.l5)
rownames(t.l5) <- NULL
t.l5$type <- "No force,\nfrom 1991"

t.l6 <- data.frame(round(m.l6,3))
t.l6$Variables <- rownames(m.l6)
rownames(t.l6) <- NULL
t.l6$type <- "No force,\nfrom 1945"

l.combined <- rbind(t.l1,t.l2,t.l3,t.l4,t.l5,t.l6)


l.combined$Variables.factor <- factor(l.combined$Variable,
                                      levels=c("Time3",
                                          "Time2",
                                          "Time",
                                          "Common law",
                                          "Recent civil war*Polyarchy",
                                          "Recent civil war",
                                          "Population (log)",
                                          "GDP cap (log)",
                                          "Polyarchy",
                                          "Milex per cap",
                                          "MID side B",
                                          "MID side A",
                                          "(Intercept)"
                                      ),
                                      ordered=TRUE)

l.combined$type.factor <- factor(l.combined$type,
                                 levels=c("No force,\nfrom 1945",
                                          "Force,\nfrom 1945",
                                          "Overall,\nfrom 1945",
                                          "No force,\nfrom 1991",
                                          "Force,\nfrom 1991",
                                          "Overall,\nfrom 1991"),
                                 ordered=TRUE)

## http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

## png(file="gc-jg-project/kampala/paper/figures/models-with-polyarchy.png",width=1440,height=960) #height=720
rome.plot <- ggplot(l.combined, aes(color=type.factor)) +
    geom_hline(yintercept=0, color=gray(1/2), lty=2) +
#    geom_linerange(aes(x=Variables, ymin=X2.5., ymax=X97.5.),
#                       lwd=1, position=position_dodge(width = 1/2)) +
    geom_pointrange(aes(x=Variables.factor, y=b, ymin=X2.5., ymax=X97.5.),
                    lwd=3/4, position=position_dodge(width = 1/2),
                    shape=21, fill="WHITE") +
#    scale_color_brewer(palette="Set1") +
    scale_color_manual(values=cbbPalette) +
    scale_y_continuous(limits=c(-7,1),breaks=seq(-7,1,1)) + #name="SATT",
    coord_flip() +
    theme_bw() +
#    ggtitle("") +
    theme(axis.text.x = element_text(size = 25),#angle=45),
          axis.text.y = element_text(size = 25),
          axis.title.x = element_blank(),
#        axis.title.x = element_text("b", size=16),
          axis.title.y = element_blank(),
#          axis.title.y = element_text(size=16),
#          legend.title=element_blank(),
          legend.text=element_text(size=15),
          legend.key.size=unit(.3, "in"),
          legend.key.width=unit(1, "in"),
          legend.title=element_text(size = 15, face="bold"),
          legend.position="bottom",
          plot.title = element_text(lineheight=.8, face="bold",size=40)) +
    labs(color = "MID track record\n") +
    guides(color=guide_legend(nrow=2,byrow=TRUE,reverse=TRUE))
## dev.off()


ggsave("gc-jg-project/kampala/paper/figures/app-figure-a11.png", plot = rome.plot, width = 14, height = 10.5, device = "png")




# Convert all elements to numeric matrices (handles various input types)
l1.fitted_clean <- lapply(l1.fitted, function(x) {
  # First convert to matrix if it isn't one
  if (!is.matrix(x)) x <- as.matrix(x)
  # Then ensure numeric values
  matrix(as.numeric(x), nrow = nrow(x), ncol = ncol(x))
})

# Convert all elements to numeric matrices (handles various input types)
l2.fitted_clean <- lapply(l2.fitted, function(x) {
  # First convert to matrix if it isn't one
  if (!is.matrix(x)) x <- as.matrix(x)
  # Then ensure numeric values
  matrix(as.numeric(x), nrow = nrow(x), ncol = ncol(x))
})

# Convert all elements to numeric matrices (handles various input types)
l3.fitted_clean <- lapply(l3.fitted, function(x) {
  # First convert to matrix if it isn't one
  if (!is.matrix(x)) x <- as.matrix(x)
  # Then ensure numeric values
  matrix(as.numeric(x), nrow = nrow(x), ncol = ncol(x))
})

# Convert all elements to numeric matrices (handles various input types)
l4.fitted_clean <- lapply(l4.fitted, function(x) {
  # First convert to matrix if it isn't one
  if (!is.matrix(x)) x <- as.matrix(x)
  # Then ensure numeric values
  matrix(as.numeric(x), nrow = nrow(x), ncol = ncol(x))
})

# Convert all elements to numeric matrices (handles various input types)
l5.fitted_clean <- lapply(l5.fitted, function(x) {
  # First convert to matrix if it isn't one
  if (!is.matrix(x)) x <- as.matrix(x)
  # Then ensure numeric values
  matrix(as.numeric(x), nrow = nrow(x), ncol = ncol(x))
})

# Convert all elements to numeric matrices (handles various input types)
l6.fitted_clean <- lapply(l6.fitted, function(x) {
  # First convert to matrix if it isn't one
  if (!is.matrix(x)) x <- as.matrix(x)
  # Then ensure numeric values
  matrix(as.numeric(x), nrow = nrow(x), ncol = ncol(x))
})

l1.fitted.mean <- as.data.frame(Reduce("+", l1.fitted_clean) / length(l1.fitted_clean))
l2.fitted.mean <- as.data.frame(Reduce("+", l2.fitted_clean) / length(l2.fitted_clean))
l3.fitted.mean <- as.data.frame(Reduce("+", l3.fitted_clean) / length(l3.fitted_clean))
l4.fitted.mean <- as.data.frame(Reduce("+", l4.fitted_clean) / length(l4.fitted_clean))
l5.fitted.mean <- as.data.frame(Reduce("+", l5.fitted_clean) / length(l5.fitted_clean))
l6.fitted.mean <- as.data.frame(Reduce("+", l6.fitted_clean) / length(l6.fitted_clean))


Data.l1 <- merge(Data,l1.fitted.mean,by.x="row.num",by.y="V2",all.x=TRUE)
Data.l1 <- Data.l1[order(Data.l1$country_name,Data.l1$year),]
Data.l1 <- Data.l1[,c("country_name","year","V1")]
Data.l1 <- aggregate(V1 ~ country_name, data=Data.l1, FUN=mean)
colnames(Data.l1) <- c("country_name","fitted.l1")

Data.l2 <- merge(Data,l2.fitted.mean,by.x="row.num",by.y="V2",all.x=TRUE)
Data.l2 <- Data.l2[order(Data.l2$country_name,Data.l2$year),]
Data.l2 <- Data.l2[,c("country_name","year","V1")]
Data.l2 <- aggregate(V1 ~ country_name, data=Data.l2, FUN=mean)
colnames(Data.l2) <- c("country_name","fitted.l2")

Data.l3 <- merge(Data,l3.fitted.mean,by.x="row.num",by.y="V2",all.x=TRUE)
Data.l3 <- Data.l3[order(Data.l3$country_name,Data.l3$year),]
Data.l3 <- Data.l3[,c("country_name","year","V1")]
Data.l3 <- aggregate(V1 ~ country_name, data=Data.l3, FUN=mean)
colnames(Data.l3) <- c("country_name","fitted.l3")

Data.l4 <- merge(Data,l4.fitted.mean,by.x="row.num",by.y="V2",all.x=TRUE)
Data.l4 <- Data.l4[order(Data.l4$country_name,Data.l4$year),]
Data.l4 <- Data.l4[,c("country_name","year","V1")]
Data.l4 <- aggregate(V1 ~ country_name, data=Data.l4, FUN=mean)
colnames(Data.l4) <- c("country_name","fitted.l4")

Data.l5 <- merge(Data,l5.fitted.mean,by.x="row.num",by.y="V2",all.x=TRUE)
Data.l5 <- Data.l5[order(Data.l5$country_name,Data.l5$year),]
Data.l5 <- Data.l5[,c("country_name","year","V1")]
Data.l5 <- aggregate(V1 ~ country_name, data=Data.l5, FUN=mean)
colnames(Data.l5) <- c("country_name","fitted.l5")

Data.l6 <- merge(Data,l6.fitted.mean,by.x="row.num",by.y="V2",all.x=TRUE)
Data.l6 <- Data.l6[order(Data.l6$country_name,Data.l6$year),]
Data.l6 <- Data.l6[,c("country_name","year","V1")]
Data.l6 <- aggregate(V1 ~ country_name, data=Data.l6, FUN=mean)
colnames(Data.l6) <- c("country_name","fitted.l6")


Data.icc.l1 <- merge(Data.icc,Data.l1,
                     by.x="country_name",by.y="country_name",
                     all.x=TRUE)

Data.icc.l2 <- merge(Data.icc.l1,Data.l2,
                     by.x="country_name",by.y="country_name",
                     all.x=TRUE)

Data.icc.l3 <- merge(Data.icc.l2,Data.l3,
                     by.x="country_name",by.y="country_name",
                     all.x=TRUE)

Data.icc.l4 <- merge(Data.icc.l3,Data.l4,
                     by.x="country_name",by.y="country_name",
                     all.x=TRUE)

Data.icc.l5 <- merge(Data.icc.l4,Data.l5,
                     by.x="country_name",by.y="country_name",
                     all.x=TRUE)

Data.icc.l6 <- merge(Data.icc.l5,Data.l6,
                     by.x="country_name",by.y="country_name",
                     all.x=TRUE)

Data.icc.l6$fitted.l1 <- Data.icc.l6$fitted.l1*100
Data.icc.l6$fitted.l2 <- Data.icc.l6$fitted.l2*100
Data.icc.l6$fitted.l3 <- Data.icc.l6$fitted.l3*100
Data.icc.l6$fitted.l4 <- Data.icc.l6$fitted.l4*100
Data.icc.l6$fitted.l5 <- Data.icc.l6$fitted.l5*100
Data.icc.l6$fitted.l6 <- Data.icc.l6$fitted.l6*100


imp1 <- mice(Data.icc.l6[,c("kampala",
                 "sidea.1991",
                 "sideb.1991",
                 "my.mil.cap.lag",
                 "v2x_polyarchy.lag",
                 "v2x_libdem.lag",     
                 "v2x_partipdem.lag",
                 "v2x_delibdem.lag", 
                 "v2x_egaldem.lag",
                 "gdp.cap.lag",
                 "pop.lag",
                 "common.law",
                 "year",
                 "rule.law.lag",
                 "aid.recipient", ## this is the lag variable, see 19-variables.R
                 "aid.recipient.q", ## this is the lag variable, see 19-variables.R
                 "All_NGO.lag",
                 "fitted.l1",
                 "country_id")], m = m, maxit = 10, seed = 3210)

models <- list()
model_results <- list()
merged_imputations <- list()
m1.model_results <- list()
m2.model_results <- list()
m3.model_results <- list()
m4.model_results <- list()
m5.model_results <- list()
m6.model_results <- list()

for (i in 1:m) {
    complete_data <- complete(imp1, i)

    complete_data$.imp <- i
    
    merged_data <- merge(complete_data, 
                         Data.icc.l6[,c("country_id","year","country_name",
                                 "kampala.t0","kampala.t1",
                                 "sidea.1945",
                                 "sideb.1945",
                                 "sidea.force.1945",
                                 "sideb.force.1945",
                                 "sidea.no.force.1945",
                                 "sideb.no.force.1945",
                                 "sidea.force.1991",
                                 "sideb.force.1991",
                                 "sidea.no.force.1991",
                                 "sideb.no.force.1991",
                                 "fitted.l2",
                                 "fitted.l3",
                                 "fitted.l4",
                                 "fitted.l5",
                                 "fitted.l6"
                                 )], 
                        by = c("country_id", "year"),  # Note: quoted variable names
                        all.x = TRUE)  # Keep all rows from complete_data
    
    merged_imputations[[i]] <- merged_data
        

    current_data <- merged_imputations[[i]]

    m1 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
              sidea.1991
            + sideb.1991
            + my.mil.cap.lag
            + v2x_libdem.lag
            + log(gdp.cap.lag)
            + log(pop.lag)
            + common.law
            + fitted.l1
            + cluster(country_id)
           ,data=current_data)

     m1.vcov_cluster <- vcovCL(m1, cluster = current_data$country_id)

     m1.model_results[[i]] <- list(
        coefficients = coef(m1),
        variance = m1.vcov_cluster,
        n = nrow(current_data)
       )
        
    m2 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                sidea.1945
            + sideb.1945
            + my.mil.cap.lag
            + v2x_libdem.lag
            + log(gdp.cap.lag)
            + log(pop.lag)
            + common.law
            + fitted.l2
            + cluster(country_name)
           ,data=current_data)

    m2.vcov_cluster <- vcovCL(m2, cluster = current_data$country_id)

    m2.model_results[[i]] <- list(
        coefficients = coef(m2),
        variance = m2.vcov_cluster,
        n = nrow(current_data)
       )
    

    m3 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                    sidea.force.1991
                + sideb.force.1991
                + my.mil.cap.lag
                + v2x_libdem.lag
                + log(gdp.cap.lag)
                + log(pop.lag)
                + common.law
                + fitted.l3
                + cluster(country_name)
               ,data=current_data)

    m3.vcov_cluster <- vcovCL(m3, cluster = current_data$country_id)

    m3.model_results[[i]] <- list(
        coefficients = coef(m3),
        variance = m3.vcov_cluster,
        n = nrow(current_data)
       )
    

    m4 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                    sidea.force.1945
            + sideb.force.1945
            + my.mil.cap.lag
            + v2x_libdem.lag
            + log(gdp.cap.lag)
            + log(pop.lag)
            + common.law
            + fitted.l4
            + cluster(country_name)
           ,data=current_data)

    m4.vcov_cluster <- vcovCL(m4, cluster = current_data$country_id)

    m4.model_results[[i]] <- list(
        coefficients = coef(m4),
        variance = m4.vcov_cluster,
        n = nrow(current_data)
       )
    

    m5 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                    sidea.no.force.1991
                + sideb.no.force.1991
                + my.mil.cap.lag
                + v2x_libdem.lag
                + log(gdp.cap.lag)
                + log(pop.lag)
                + common.law
                + fitted.l5
                + cluster(country_name)
               ,data=current_data)
    
    m5.vcov_cluster <- vcovCL(m5, cluster = current_data$country_id)

     m5.model_results[[i]] <- list(
        coefficients = coef(m5),
        variance = m5.vcov_cluster,
        n = nrow(current_data)
       )
    

    m6 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                    sidea.no.force.1945
                + sideb.no.force.1945
                + my.mil.cap.lag
                + v2x_libdem.lag
                + log(gdp.cap.lag)
                + log(pop.lag)
                + common.law
                + fitted.l6
                + cluster(country_name)
               ,data=current_data)
    
    m6.vcov_cluster <- vcovCL(m6, cluster = current_data$country_id)

    m6.model_results[[i]] <- list(
        coefficients = coef(m6),
        variance = m6.vcov_cluster,
        n = nrow(current_data)
       )
}


m1.pooled_results <- MIcombine(
    results = lapply(m1.model_results, function(x) x$coefficients),
    variances = lapply(m1.model_results, function(x) x$variance)
)

m2.pooled_results <- MIcombine(
    results = lapply(m2.model_results, function(x) x$coefficients),
    variances = lapply(m2.model_results, function(x) x$variance)
)

m3.pooled_results <- MIcombine(
    results = lapply(m3.model_results, function(x) x$coefficients),
    variances = lapply(m3.model_results, function(x) x$variance)
)

m4.pooled_results <- MIcombine(
    results = lapply(m4.model_results, function(x) x$coefficients),
    variances = lapply(m4.model_results, function(x) x$variance)
)

m5.pooled_results <- MIcombine(
    results = lapply(m5.model_results, function(x) x$coefficients),
    variances = lapply(m5.model_results, function(x) x$variance)
)

m6.pooled_results <- MIcombine(
    results = lapply(m6.model_results, function(x) x$coefficients),
    variances = lapply(m6.model_results, function(x) x$variance)
)



# View final results
summary(m1.pooled_results)
summary(m2.pooled_results)
summary(m3.pooled_results)
summary(m4.pooled_results)
summary(m5.pooled_results)
summary(m6.pooled_results)


coef.m1 <- m1.pooled_results$coefficients
se.m1 <- sqrt(diag(m1.pooled_results$variance))
z.m1 <- coef.m1/se.m1
p.m1 <- 2*pnorm(-abs(z.m1))
conf.m1 <- confint(m1.pooled_results)
m.m1 <- cbind(coef.m1,se.m1,p.m1,conf.m1)

coef.m2 <- m2.pooled_results$coefficients
se.m2 <- sqrt(diag(m2.pooled_results$variance))
z.m2 <- coef.m2/se.m2
p.m2 <- 2*pnorm(-abs(z.m2))
conf.m2 <- confint(m2.pooled_results)
m.m2 <- cbind(coef.m2,se.m2,p.m2,conf.m2)

coef.m3 <- m3.pooled_results$coefficients
se.m3 <- sqrt(diag(m3.pooled_results$variance))
z.m3 <- coef.m3/se.m3
p.m3 <- 2*pnorm(-abs(z.m3))
conf.m3 <- confint(m3.pooled_results)
m.m3 <- cbind(coef.m3,se.m3,p.m3,conf.m3)

coef.m4 <- m4.pooled_results$coefficients
se.m4 <- sqrt(diag(m4.pooled_results$variance))
z.m4 <- coef.m4/se.m4
p.m4 <- 2*pnorm(-abs(z.m4))
conf.m4 <- confint(m4.pooled_results)
m.m4 <- cbind(coef.m4,se.m4,p.m4,conf.m4)

coef.m5 <- m5.pooled_results$coefficients
se.m5 <- sqrt(diag(m5.pooled_results$variance))
z.m5 <- coef.m5/se.m5
p.m5 <- 2*pnorm(-abs(z.m5))
conf.m5 <- confint(m5.pooled_results)
m.m5 <- cbind(coef.m5,se.m5,p.m5,conf.m5)

coef.m6 <- m6.pooled_results$coefficients
se.m6 <- sqrt(diag(m6.pooled_results$variance))
z.m6 <- coef.m6/se.m6
p.m6 <- 2*pnorm(-abs(z.m6))
conf.m6 <- confint(m6.pooled_results)
m.m6 <- cbind(coef.m6,se.m6,p.m6,conf.m6)

                 

rownames(m.m1) <- rownames(m.m2) <- rownames(m.m3) <-
    rownames(m.m4) <- rownames(m.m5) <- rownames(m.m6) <- 
    c("MID side A",
      "MID side B",
      "Milex per cap",
      "Polyarchy",
      "GDP cap (log)",
      "Population (log)",
      "Common law",
      "Fitted values"
      )

colnames(m.m1) <- colnames(m.m2) <- colnames(m.m3) <-
    colnames(m.m4) <- colnames(m.m5) <- colnames(m.m6) <-
    c("b","std.err","p","X2.5.","X97.5.")

t.m1 <- data.frame(round(m.m1,3))
t.m1$Variables <- rownames(m.m1)
rownames(t.m1) <- NULL
t.m1$type <- "Overall,\nfrom 1991"

t.m2 <- data.frame(round(m.m2,3))
t.m2$Variables <- rownames(m.m2)
rownames(t.m2) <- NULL
t.m2$type <- "Overall,\nfrom 1945"

t.m3 <- data.frame(round(m.m3,3))
t.m3$Variables <- rownames(m.m3)
rownames(t.m3) <- NULL
t.m3$type <- "Force,\nfrom 1991"

t.m4 <- data.frame(round(m.m4,3))
t.m4$Variables <- rownames(m.m4)
rownames(t.m4) <- NULL
t.m4$type <- "Force,\nfrom 1945"

t.m5 <- data.frame(round(m.m5,3))
t.m5$Variables <- rownames(m.m5)
rownames(t.m5) <- NULL
t.m5$type <- "No force,\nfrom 1991"

t.m6 <- data.frame(round(m.m6,3))
t.m6$Variables <- rownames(m.m6)
rownames(t.m6) <- NULL
t.m6$type <- "No force,\nfrom 1945"

t.combined <- rbind(t.m1,t.m2,t.m3,t.m4,t.m5,t.m6)

## "Population (log)",
## "MID side B",      
## "Common law",      
## "GDP cap (log)",   
## "Polyarchy",       
## "MID side A"

t.combined$Variables.factor <- factor(t.combined$Variable,
                                      levels=c("Fitted values",
                                               "Common law",
                                               "Population (log)",
                                               "GDP cap (log)",
                                               "Polyarchy",
                                               "Milex per cap",
                                               "MID side B",
                                               "MID side A"
                                               ),
                                      ordered=TRUE)

t.combined$type.factor <- factor(t.combined$type,
                                 levels=c("No force,\nfrom 1945",
                                          "Force,\nfrom 1945",
                                          "Overall,\nfrom 1945",
                                          "No force,\nfrom 1991",
                                          "Force,\nfrom 1991",
                                          "Overall,\nfrom 1991"),
                                 ordered=TRUE)

## http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

## FIGURE_WITH_FITTED_VALUES
fitted.plot <- ggplot(t.combined, aes(color=type.factor)) +
    geom_hline(yintercept=0, color=gray(1/2), lty=2) +
#    geom_linerange(aes(x=Variables, ymin=X2.5., ymax=X97.5.),
#                       lwd=1, position=position_dodge(width = 1/2)) +
    geom_pointrange(aes(x=Variables.factor, y=b, ymin=X2.5., ymax=X97.5.),
                    lwd=2, position=position_dodge(width = 3/5),
                    shape=21, fill="WHITE") +
#    scale_color_brewer(palette="Set1") +
    scale_color_manual(values=cbbPalette) +
    scale_y_continuous(limits=c(-5.3,2),breaks=seq(-5,2,1)) + #name="SATT",
    coord_flip() +
    theme_bw() +
#    ggtitle("") +
    theme(axis.text.x = element_text(size = 25),#angle=45),
          axis.text.y = element_text(size = 25),
          axis.title.x = element_blank(),
#        axis.title.x = element_text("b", size=16),
          axis.title.y = element_blank(),
#          axis.title.y = element_text(size=16),
#          legend.title=element_blank(),
          legend.text=element_text(size=18),
          legend.key.size=unit(.5, "in"),
          legend.key.width=unit(1, "in"),
          legend.title=element_text(size = 18, face="bold"),
          legend.position="bottom",
          plot.title = element_text(lineheight=.8, face="bold",size=40)) +
    labs(color = "MID track record\n") +
    guides(color=guide_legend(nrow=2,byrow=TRUE,reverse=TRUE))

ggsave("gc-jg-project/kampala/paper/figures/app-figure-a12.png", plot = fitted.plot, width = 14, height = 10.5, device = "png")



t.n.obs <- m1$n
t.n.fail <- m1$nevent
t.n.fail.tot <- table(Data$kampala)[2]
t.miss.vals <- dim(Data)[1]-t.n.obs
